
library(tidyverse)
library(readxl)
library(RSQLite)
library(glue)
library(scales)
library(lubridate)
library(haven)
library(humaniformat)
# Table II ------

## ... Load Data ----

### Am Law Data

amlaw200 = read_excel("Data/amlaw200_2009_through_2019.xlsx") %>%
  #very strange non-standard spacing on one organization Eckert
  mutate(`Organization Name`=str_replace_all(`Organization Name`,"\\s"," "))

### DF Meeting Data

meetings = read_dta("Data/FedMeetings_STATAraw.dta") %>%
  #eliminate some capitalization inconsistencies between Dan's stata file and Brian's Excel
  rename(Organization=organization,
         Individual=individual,
         Surname=surname)

### OpenSecrets Data

db = dbConnect(SQLite(),dbname="Data/opensecretslobbying_2021-06-09.sqlite")

query = "
select 
  year,registrant,sum(amount) as total_income
from 
  lobbying
where 
  Isfirm is not NULL
  and
  type in ('q1','q2','q3','q4') and
  use='y'
group by
  year,registrant
order by 
  total_income desc
"

annual.income.lobbying.orgs = db %>% tbl(sql(query)) %>% collect()


## .... Produce Aggregates ----

### find the top lobbying firms by revenue
top.lobbying.firms = annual.income.lobbying.orgs %>%
  filter(Year>=2010,Year<=2017) %>%
  group_by(Year) %>%
  arrange(desc(total_income)) %>%
  mutate(year_rank=row_number()) %>%
  group_by(Registrant) %>%
  summarise(highest_rank_lobbying_firm_by_revenue=min(year_rank),
            lowest_rank_lobbying_firm_by_revenue=max(year_rank),
            max_lobbying_revenue=max(total_income),
            min_lobbying_revenue=min(total_income)) %>%
  filter(highest_rank_lobbying_firm_by_revenue<=5,lowest_rank_lobbying_firm_by_revenue<10)


### similar calculation for amlaw 

amlaw200$orgname_no_llp = str_replace(amlaw200$`Organization Name`,"LLP","") %>% 
  trimws(whitespace="[ \t\r\n,]")

amlaw_200_summarized = amlaw200 %>%
  filter(`Fiscal Year`>=2010,`Fiscal Year`<=2017) %>%
  group_by(Year=`Fiscal Year`) %>%
  arrange(desc(`Gross Revenue`)) %>%
  mutate(year_rank=row_number()) %>%
  group_by(`orgname_no_llp`) %>%
  summarise(highest_rank_law_firm_by_revenue=min(year_rank),
            lowest_rank_law_firm_by_revenue=max(year_rank),
            min_PPP=min(PPP),
            max_PPP=max(PPP),
            max_revenue=max(`Gross Revenue`),
            min_revenue=min(`Gross Revenue`))


### similar calculation for DF Meetings
meetings_by_org = meetings %>%
  select(Organization,url) %>%
  unique() %>%
  group_by(Organization) %>%
  count() 

## ... Merge Data ----

#combine law and lobbying
crosswalk=read_csv("Data/ALM_AMLAW_CRP_crosswalk_partial.csv") %>%
  select(-...4)

top.lobbying.firms.with.law.data = top.lobbying.firms %>%
  left_join(crosswalk,by="Registrant") %>%
  left_join(amlaw_200_summarized,by=c(`Organization Name`="orgname_no_llp")) %>% 
  arrange(desc(max_lobbying_revenue))

#manual addition fastest for these firms
top.lobbying.firms.with.law.data$dodd.frank.meetings=c(3,1,0,0,0)

# output this data for the table
write.csv(top.lobbying.firms.with.law.data,"Tables/table2_top_lobby_part.csv")

# clean up the names for these top dogs
top.meeting.firms = meetings_by_org %>% 
  arrange(desc(n)) %>%
  filter(Organization %in% c("Sullivan & Cromwell",
                             'Debevoise & Plimpton LLP',
                             'David Polk & Wardwell LLP',
                             'Cleary, Gottlieb, Steen and Hamilton LLP',
                             'Morrison & Foerster, LLP'))

top.meeting.firms$Organization[2] ='Debevoise & Plimpton'
top.meeting.firms$Organization[3] ='Davis Polk & Wardwell'
top.meeting.firms$Organization[4] ='Cleary Gottlieb Steen & Hamilton'
top.meeting.firms$Organization[5] ='Morrison & Foerster'


#now let's add the lobbying for these top firms
lobbying.summaries.for.table= annual.income.lobbying.orgs %>%
  filter(Year>=2010,Year<=2017) %>%
  group_by(Year) %>%
  arrange(desc(total_income)) %>%
  mutate(year_rank=row_number()) %>%
  group_by(Registrant) %>%
  summarise(highest_rank_lobbying_firm_by_revenue=min(year_rank),
            lowest_rank_lobbying_firm_by_revenue=max(year_rank),
            max_lobbying_revenue=max(total_income),
            min_lobbying_revenue=min(total_income))

top.meeting.firms.with.lobbying.and.law.data = top.meeting.firms %>%
  left_join(crosswalk,by=c(Organization="Organization Name")) %>%
  left_join(lobbying.summaries.for.table,by="Registrant") %>%
  left_join(amlaw_200_summarized,by=c(`Organization`="orgname_no_llp")) 

# output this data for the table
write.csv(top.meeting.firms.with.lobbying.and.law.data,"Tables/table2_top_law_part.csv")

# reviewer suggests a more scattered approach

lobbying.firms.of.interest = c("Davis, Polk & Wardwell",
                      "Sullivan & Cromwell",
                      "Debevoise & Plimpton",
                      "Cleary, Gottlieb et al",
                      "Morrison & Foerster",
                      "Patton Boggs LLP",
                      "Squire Patton Boggs",
                      "Akin, Gump et al",
                      "Brownstein, Hyatt et al",
                      "Holland & Knight",
                      "K&L Gates")


scatter.materials.lobbying = annual.income.lobbying.orgs %>% 
  filter(Year>=2010,Year<=2017) %>%
  group_by(Year) %>%
  arrange(desc(total_income)) %>%
  mutate(year_rank=row_number()) %>%
  filter(Registrant %in% lobbying.firms.of.interest) %>%
  arrange(Year)


law.firms.of.interest = c("Davis Polk & Wardwell",
                          "Sullivan & Cromwell",
                          "Debevoise & Plimpton",
                          "Cleary Gottlieb Steen & Hamilton",
                          "Morrison & Foerster",
                          "Patton Boggs",
                          "Squire Patton Boggs",
                          "Akin Gump Strauss Hauer & Feld",
                          "Brownstein Hyatt Farber Schreck",
                          "Holland & Knight",
                          "K&L Gates")

scatter.materials.lawyering = amlaw200 %>%
  filter(orgname_no_llp %in% law.firms.of.interest) %>%
  select(Year=`Fiscal Year`,`Rank By PPP`,orgname_no_llp)

adhoc.crosswalk = data.frame(lobbyname=lobbying.firms.of.interest,
                             lawname=law.firms.of.interest,
                             type=factor(c(1,1,1,1,1,2,2,2,2,2,2),labels=c("Regulatory","Reported")),
                             common_name=c("Davis Polk","Sullivan & Cromwell","Debevoise","Cleary","Morrison & Foerster","Patton Boggs","Patton Boggs","Akin Gump","Brownstein Hyatt",
                                           "Holland & Knight",
                                           "K&L Gates"))

 
scatter.materials = scatter.materials.lobbying %>%
  left_join(adhoc.crosswalk,by=c("Registrant"="lobbyname")) %>%
  left_join(scatter.materials.lawyering,by=c("lawname"="orgname_no_llp","Year"))
#note that we might have lost some observations for firm sthat didn't lobby

ggplot(scatter.materials,aes(x=`year_rank`,y=`Rank By PPP`,color=type)) + 
  geom_point()  + 
  scale_color_discrete("Firm Category") + 
  scale_y_reverse("Law Firm Profitability Rankings",breaks=c(1,20,50,100,200)) + 
  scale_x_reverse("Lobbying Firm Revenue Rankings",breaks=c(1,500,1000,1500)) +
  theme_bw() +
  ggtitle("Annual Rankings of Ten Prominent Firms") +
  theme(plot.title = element_text(hjust=0.5))


# another bigger version of the above

raw_crosswalk = read_excel("Data/alm_amlaw_crp_crosswalk_v2.xlsx",sheet = 2,skip=4) %>% 
  transmute(AMLawName=str_replace_all(AMLawName,"\\s"," "),
            Registrant=str_replace_all(Registrant,"\\s"," ")) %>%
  distinct()

#number of unique amlaw names
amlaw200 = read_csv("Data/amlaw200_v2.csv") %>%
  mutate(Year=FY) 

amlaw200 %>%
  select(`Firm Name`) %>%
  distinct() %>%
  nrow() # 557

x=raw_crosswalk %>% 
  filter(Registrant!='NA') %>%
  select(AMLawName) %>%
  distinct()# 404

y=raw_crosswalk %>% 
  filter(Registrant=='NA') %>%
  select(AMLawName) %>%
  distinct() # 153
  
  
scatterdata = amlaw200 %>%
  select(
    Year=`FY`,
    AMLawName=`Firm Name`,
         PPP=`Profits Per Equity Partner`) %>%
  left_join(raw_crosswalk,
            by = join_by(AMLawName),
            multiple="all") %>%
  left_join(annual.income.lobbying.orgs,by = join_by(Year, Registrant)) %>%
  group_by(Year,AMLawName) %>%
  summarise(PPP1=max(PPP),
            PPP2=mean(PPP), #These two should be the same, but it's a good check the merge is working as it seems
            total_income=sum(total_income,na.rm=T)) 

# scatterdata has 2200 observations
scatterdata %>% filter(PPP1!=PPP2) #no cases where we got redundant PPP1

dollars <- label_dollar(
  scale_cut = c(0, k = 1e3, m = 1e6, bn = 1e9, tn = 1e12)
)

big.reg.lob = c("Davis Polk & Wardwell",
  "Sullivan & Cromwell",
  "Debevoise & Plimpton",
  "Cleary Gottlieb Steen & Hamilton",
  "Morrison & Foerster")

big.lob = c("Patton Boggs",
  "Squire Patton Boggs",
  "Akin Gump Strauss Hauer & Feld",
  "Brownstein Hyatt Farber Schreck",
  "Holland & Knight",
  "K&L Gates")

scatter_data= scatterdata%>%
  filter(Year>=2010,Year<=2017) %>%
  #filter(total_income>0) %>%
  rowwise() %>%
  mutate(
    category = 
      case_when(`AMLawName` %in% big.reg.lob ~ "Regulatory Lobbying Firm",
              `AMLawName` %in% big.lob ~ "Reported Lobbying Firm",
              T ~ NA_character_)
  ) 


highlights = scatter_data %>% 
  group_by(category) %>%
  summarise(v=mean(PPP1)) %>% filter(!is.na(category))

g = ggplot(scatter_data,aes(x=PPP1,y=total_income)) + 
  geom_point(alpha=0.3)  + 
  geom_point(data=scatter_data %>%
               filter(!is.na(category)),
             aes(color=category,
                 shape=category),size=2.5) + 
  scale_color_discrete(NULL,
                       breaks=c("Regulatory Lobbying Firm","Reported Lobbying Firm"),
                       na.value="grey50")+
  scale_shape_manual(values=c("Regulatory Lobbying Firm"="triangle",
                              "Reported Lobbying Firm"="square")) + 
  scale_x_continuous("Profits Per Partner",
                     breaks=c(highlights$v,2e6,4e6,6e6),
                     labels=c(
                       "Group Average",
                       "Group Average",
                       dollars(2e6),
                       dollars(4e6),
                       dollars(6e6))
                     ) +
  scale_y_log10("Lobbying Revenue",labels=dollars) + 
  theme_bw() +
  theme(plot.title = element_text(hjust=0.5)) + 
  #ggtitle("Lobbying Revenues and Profits Per Partner in Am Law 200 (2010-2017)") + 
  coord_flip() + 
  geom_vline(data=highlights,aes(xintercept=v,color=category)) 


g=g + 
  guides(color=guide_legend(override.aes=list(shape=c(17,15))),
         shape="none")

#g + 
 # geom_vline(xintercept=(scatter_data %>% ungroup() %>%
#                           summarise(v=mean(PPP1)) %>% pull()))
sc = scatter_data %>% filter(AMLawName=='Sullivan & Cromwell')

labelloc = c(x=4.5e6,y=10**5.8)

plot_with_sullcrom = g + 
  annotate("text",y=10**5.85,x=labelloc["x"],label="Sullivan & Cromwell",hjust="left") + 
  ggtitle("")  +
  annotate("curve",
           xend=sc$PPP1,
           yend=sc$total_income,
           arrow = arrow(length = unit(2, "mm")),
           y=labelloc["y"],
           x=labelloc["x"])

ag = scatter_data %>% filter(str_detect(AMLawName,"Akin"))

labelloc2 = c(x=2.5e6,y=10**7.3)


plot_with_sc_akin = plot_with_sullcrom + 
  annotate("text",
           x=labelloc2["x"],
           y=labelloc2["y"],
           label="Akin Gump",
           hjust="right") + 
  annotate("curve",
           xend=ag$PPP1,
           yend=ag$total_income,
           arrow = arrow(length = unit(2, "mm")),
           curvature=-0.2,
           y=labelloc2["y"],
           x=labelloc2["x"])



# dpw = scatter_data %>% filter(str_detect(AMLawName,"Davis Polk"))
# labelloc3 = c(x=4.5e6,y=10**4)
# 
# plot_with_sc_akin_dpw = plot_with_sc_akin + 
#   annotate("curve",
#            xend=dpw$PPP1,
#            yend=dpw$total_income,
#            arrow = arrow(length = unit(2, "mm")),
#            y=labelloc3["y"],
#            x=labelloc3["x"])

ggsave("Figures/lobbyingrevenue_v_ppp.png",plot_with_sc_akin,width = 8.5,height = 4,units = "in",bg='white')
ggsave("Figures/lobbyingrevenue_v_ppp_beamer.png",plot_with_sc_akin,width = 9,height = 5,units = "in",bg='white')
ggsave("Figures/lobbyingrevenue_v_ppp.pdf",plot_with_sc_akin,width = 8.5,height = 4,units = "in",bg='white')
ggsave("Figures/lobbyingrevenue_v_ppp_beamer.pdf",plot_with_sc_akin,width = 9,height = 5,units = "in",bg='white')



 # ... Top participant focused stuff ----

top_meeting_people = meetings %>%  
  mutate(Organization=ifelse(Organization=='David Polk & Wardwell LLP',"Davis Polk & Wardwell LLP",Organization),
         Individual=ifelse(Individual=='Randall Guynn','Randall D. Guynn',Individual)) %>%
  group_by(Organization,Individual) %>% count() %>% arrange(desc(n))

# reference people
names = c("Janet L. Yellen","Sean Campbell","Daniel K. Tarullo","Jerome Powell")

top_meeting_people %>% filter(Individual %in% names) %>% write.csv("Tables/table2_reference_mention.csv")


orgs_of_interest = c('Sullivan & Cromwell',"Patton Boggs LLP","Akin Gump Strauss Hauer & Feld LLP","Brownstein Hyatt","Holland and Knight",
                     "Debevoise & Plimpton LLP","Davis Polk & Wardwell LLP","Cleary, Gottlieb, Steen and Hamilton LLP","Morrison & Foerster, LLP")


#the people we want to focus on
top_meeting_people_by_org = top_meeting_people %>% filter(Organization %in% orgs_of_interest) %>% group_by(Organization) %>% arrange(desc(n)) %>% top_n(1)


## now try to find these people in the lobbying database

lobbyists = db %>%
  tbl(sql("select distinct Lobbyist,Lobbyist_id from lobbyists")) %>%
  collect()
  
# Find these particular individuals 
## Rodgin Cohen
lobbyists %>% filter(str_detect(Lobbyist,"Rodgin")) #Y0000001648L
## Oliver Ireland
lobbyists %>% filter(str_detect(Lobbyist,"Ireland")) #Y0000041124L
## Derek M. Bush
lobbyists %>% filter(str_detect(Lobbyist,"Bush")) #Y0000024737L
## Randall D. Guynn
lobbyists %>% filter(str_detect(Lobbyist,"Guynn")) #Y0000038820L
## Paul Lee
lobbyists %>% filter(str_detect(Lobbyist,"Lee") & str_detect(Lobbyist,"Paul")) #Y0000000213L
## Carolyn Walsh
lobbyists %>% filter(str_detect(Lobbyist,"Walsh") & str_detect(Lobbyist,"Carolyn"))  #Y0000024109L
## Smith W. Davis
lobbyists %>% filter(str_detect(Lobbyist,"Davis")) #Y0000042092L

top_meeting_people_by_org$Lobbyist_id = poi =  c("Y0000001648L","Y0000041124L","Y0000024737L","Y0000038820L","Y0000000213L","Y0000024109L","Y0000042092L")

## Some alternatives

poi_activity = db %>%
  tbl("lobbyists") %>%
  filter(Lobbyist_id %in% poi) %>%
  collect()
  
poi_activity_summary = poi_activity %>% 
  group_by(Lobbyist_id) %>%
  summarise(LastYear=max(Year,na.rm=T),FirstYear=min(Year,na.rm=T),TotalYears=length(unique(Year)))

top_meeting_people_by_org %>%
  left_join(poi_activity_summary) %>%
  write.csv(.,"Tables/table2_top_meeting_people.csv")


# Lawyer Spending Stuff ----
draws = read_csv("Data/imputed_proportions.csv")
range=quantile(draws$y,c(.25,.75))
d = read_csv("Data/top5_firms.csv")

g = ggplot(data=d,aes(x=year)) + 
  geom_ribbon(aes(ymin=range["25%"]*BHCK4092,ymax=range["75%"]*BHCK4092),fill = "grey90") + 
  geom_line(data=d %>% filter(BHCK4141!=0),aes(y=BHCK4141,lty='Legal')) + 
  geom_line(data=d,inherit.aes = F,aes(x=year,y=total.lobbying.spend,lty='Lobbying')) + 
  geom_line(data=d,inherit.aes = F,aes(x=year,y=total_pac_donations,lty='PAC'))+ 
  scale_linetype("Expense") + 
  theme_bw() + 
  scale_y_log10(name='Reported Legal Expenses',labels=label_number_si(accuracy=0.1)) + 
  facet_wrap(~Ultorg,nrow=1) + 
  theme(legend.position = 'bottom')  +
  ggthemes::theme_tufte() + 
  xlim(2002,2018)  +
  xlab(NULL)

ggsave("Figures/bhc_expenses_raw.png",g,width = 8.5,height = 4,units = "in",bg='white')
ggsave("Figures/bhc_expenses_raw.svg",g,width = 8.5,height = 4,units = "in",bg='white')
ggsave("Figures/bhc_expenses_raw.pdf",g,width = 8.5,height = 4,units = "in",bg='white')

# Lobbying by Sector ----

# an example, try to pull the securities firms
securities = db %>% 
  tbl("crp_categories") %>%
  filter(Catorder=='F07') %>%
  pull(Catcode)

# using the industry table gives answer inconsistent with website
# although not far off
# https://www.opensecrets.org/federal-lobbying/industries/summary?cycle=2020&id=F07
db %>% tbl("lobbying_industry") %>%
  filter(Catcode %in% securities) %>%
  group_by(Year) %>%
  summarise(sum(Total,na.rm=T)) %>%
  collect() %>%
  tail()

# the suggested resolution is like so
# https://groups.google.com/g/opensecrets-open-data/c/E2MGWhVdGdA/m/yYfYOqHxAwAJ
db %>%
  tbl("lobbying") %>%
  filter(use=='y',ind=='y',catcode %in% securities)  %>%
  group_by(Year) %>%
  summarise(sum(Amount)) %>%
  collect() 
# but it isn't consistent either, although it is close.

# so let's just do it this way
units = db %>% 
  tbl("crp_categories") %>%
  filter(Catcode %in% c("F0000","F1000","F1100")) %>%
  select(Catcode,Catname) %>%
  collect()


values = 
  bind_rows(
    db %>%
    tbl("lobbying_industry") %>%
    filter(Catcode %like% 'F%') %>%
    group_by(Year) %>%
    summarise(Total=sum(Total)) %>%
    collect() %>%
    pivot_wider(names_from=Year,values_from=Total) %>%
    mutate(Catcode='F0000') %>%
    select(Catcode,everything()),
    db %>%
      tbl("lobbying_industry") %>%
      filter(Catcode %like% 'F1%') %>%
      group_by(Year) %>%
      summarise(Total=sum(Total)) %>%
      collect() %>%
      pivot_wider(names_from=Year,values_from=Total) %>%
      mutate(Catcode='F1000') %>%
      select(Catcode,everything()),
    db %>%
      tbl("lobbying_industry") %>%
      filter(Catcode %like% 'F11%') %>%
      group_by(Year) %>%
      summarise(Total=sum(Total)) %>%
      collect() %>%
      pivot_wider(names_from=Year,values_from=Total) %>%
      mutate(Catcode='F1100') %>%
      select(Catcode,everything()))

lobbying_industry = db %>%
  tbl("lobbying_industry") %>%
  collect()
  
crp_categories = db %>% tbl("crp_categories")%>% collect()
    
db %>% tbl("crp_categories")%>% filter(Catorder %like% "F%")



sector_totals = lobbying_industry %>%
  left_join(crp_categories,by='Catcode') %>%
  filter(str_detect(Catorder,"F.+")) %>%
  group_by(Catcode=Catorder,Catname=Industry,Year) %>%
  summarise(Total=sum(Total)) %>%
  pivot_wider(names_from=Year,values_from=Total)

categories_of_interest = units %>%
  left_join(values)

summaries = bind_rows(categories_of_interest,sector_totals) 
summaries$AverageYearly = rowMeans(summaries[,-c(1,2)])
summaries %>% select(AverageYearly,everything()) %>%
  write.csv(x = .,file = "Tables/annual_lobbying_spending_by_industry_category.csv")


#% Table 1 stuff ----


meetings %>%
  filter(Surname=='Cohen',
         str_detect(Organization,'Sullivan'),
         str_detect(Individual,"Rodg")) %>%
  group_by(Y=year(date)) %>%
  count() %>%
  write.csv(x=.,file="Tables/fed_nprm_meetings_by_year.csv")



#% Table 4 stuff ----

# Assumes some code that produces a clean lobbyist table. 

lobbyist_table = read_csv("Data/lobbyist_table.csv")
#some random quantitites mentioned in the paper
lobbyist_table$has_profile %>% sum(na.rm=T) #Registered Lobbyists with public linkedin profile
lobbyist_table$has_credential %>% sum(na.rm=T) #Registered Lobbyists with public linkedin profile
lobbyist_table$has_law_credential %>% sum(na.rm=T) #Registered Lobbyists with public linkedin profile
lobbyist_table %>%
  filter(count_lobbying_reports_df_and_predecessors>0) %>%
  group_by(has_credential) %>%
  count() #Number of lobbyists on Dodd-Frank or predecessor with degree info


lobbyist_table %>%
  filter(count_lobbying_reports_df_and_predecessors>0) %>%
  filter(has_credential) %>%
  group_by(has_law_credential) %>%
  count() #Number of lobbyists on Dodd-Frank or predecessor with degree info

govt_or_private = read_csv("Data/government_org_or_not_companion_to_FedMeetings_STATAraw_dta.csv")

meetings %>%
  select(Individual,Organization) %>%
  distinct() %>%
  left_join(govt_or_private) %>%
  group_by(Government) %>%
  count() #People who met with FRB

lobbyist_table %>% group_by(met.with.fed) %>%count() #Number of FRB meeters who were lobbyists


#meetings %>% 
#  transmute(parse_names(Individual)) %>%
#  left_join(alm)

sink("Tables//Table4_raw.txt")

crp = dbConnect(SQLite(),dbname="Data/opensecretslobbying_2021-06-09.sqlite")

print("Number of distinct lobbyists in 2017")
crp %>%
  tbl(sql("select 
            count(distinct Lobbyist_id)
          from lobbyists where Year=2017")) %>% print()

print("Number of distinct lobbyists in 1998-2020")
crp %>%
  tbl(sql("select 
            count(distinct Lobbyist_id)
          from lobbyists where Year>=1998 and Year<=2020"))

print("Number of distinct registrants in 1998-2020")
crp %>% tbl(
  sql("select count(distinct Registrant) 
      from lobbying where  Year>=1998 and Year<=2020"))

print("Number of distinct registrants in 2017")
crp %>% tbl(
  sql("select count(distinct Registrant) 
      from lobbying where Year=2017"))


## DF stuff
page=xml2::read_html("https://www.llsdc.org/dodd-frank-legislative-history")
hrefs = xml2::xml_find_all(page,".//a[contains(@href,'congress.gov/bill')]/@href") %>% xml2::xml_text()
bills = str_match(hrefs,"(\\d+)th-congress\\/(\\w+)-bill\\/(\\d+)") %>% as_tibble() %>%
  select(-V1) %>%
  rename(congno=V2,body=V3,billno=V4) %>%
  rowwise() %>%
  mutate(B_ID=paste0(ifelse(body=='senate','s','hr'),billno,'-',congno)) %>%
  distinct()
b_ids=glue::glue_sql("{vals*}",vals=bills$B_ID,.con=crp)

library(glue)
query = glue(
  "with df_bills as (select B_ID,SI_ID,CongNo from lobbyingbills where B_ID in ({b_ids}))
  select Lobbyist_id,count(distinct Uniqid) as count_df_reports from df_bills
  left join lobbyingissues_withoutspecificissue using(SI_ID)
  left join lobbyists using(Uniqid)
  group by Lobbyist_id",
  b_ids=b_ids)

df_or_precursor_lobbyists = crp %>%
  tbl(sql(query)) %>%
  filter(!is.na(Lobbyist_id)) %>%
  collect()

print("Number of Lobbyists on a DF or related precursor bill")
print(df_or_precursor_lobbyists %>% nrow())

print("NUmber of attorneys in government or policy")
alm_compass_gov_policy = read_csv("Data/all_government_policy_names_parsed.csv")

print(alm_compass_gov_policy %>% nrow())
print("NUmber of attorneys in government or policy by title")
alm_compass_gov_policy %>%
  group_by(Title) %>%
  count() 

print("NUmber of attorneys in government or policy at partner level")
print(15896 + 873)
sink()


#%%% ALM MEETING MATCHES

meetings %>% 
  transmute(parse_names(Individual)) %>%
  select(first_name,last_name) %>%
  distinct() %>%
  left_join(alm_compass_gov_policy,by=c("first_name"="GivenName",
                                         "last_name"="Surname")) %>% 
  filter(!is.na(FirmName)) %>%
  select(first_name,last_name) %>%
  distinct() %>%
  nrow() #319

meetings %>% 
  transmute(parse_names(Individual)) %>%
  select(first_name,last_name) %>%
  distinct() %>%
  left_join(alm_compass_gov_policy,by=c("first_name"="GivenName",
                                        "last_name"="Surname")) %>% 
  filter(!is.na(FirmName)) %>%
  filter(str_detect(Title,"Partner")) %>%
  select(first_name,last_name) %>%
  distinct() %>%
  nrow() #235

meetings %>% 
  transmute(parse_names(Individual)) %>%
  select(first_name,last_name) %>%
  distinct() %>%
  left_join(alm_compass_gov_policy,by=c("first_name"="GivenName",
                                        "last_name"="Surname")) %>% 
  filter(!is.na(FirmName)) %>%
  filter(Title=='Partner; Firm Leadership') %>%
  select(first_name,last_name) %>%
  distinct() %>%
  nrow() #235

print("DONE!")

